home *** CD-ROM | disk | FTP | other *** search
- /*
- ** BlockMoveBits by Bob Boonstra
- **
- ** Solution strategy:
- ** Use 68030 bit field manipulation instructions
- ** rather than shifting and masking.
- ** Accomplish move in three steps, where the first step
- ** aligns destination to longword, second step uses
- ** BFEXTU/MOVE.L combination instead of BFEXTU/BFINS to
- ** move bulk of the bits, and third step cleans up.
- ** Special case when srcBitOffset==destBitOffset,
- ** allowing main loop to use MOVE.L (x)+,(y)+
- **
- ** Relative execution times for various strategies:
- ** 100: Straigntforward BFEXTU/BFINS in 32-bit chunks,
- ** 70: byte-align src and MOVE.L/BFINS in main loop,
- ** 58: byte-align dst and BFEXTU/MOVE.L in main loop,
- ** 50: long-aligned dst and BFEXTU/MOVE.L in main loop,
- ** 29: as above, if srcOffset==dstOffset use one MOVE.L
- */
-
- /* some register definitions for readability */
- #define bitCt d2
- #define srcOffset d6
- #define dstOffset d7
- #define srcPtr a0
- #define dstPtr a1
-
- void BlockMoveBits(char *srcBytePtr, char *destBytePtr,
- unsigned char srcBitOffset, unsigned char destBitOffset,
- unsigned short bitCount)
- {
- asm 68030 {
-
- ; save registers
-
- MOVEM.L d6-d7,-(a7)
-
- ; exit if no bits to move
-
- MOVEQ #0,bitCt
- MOVE.W bitCount,bitCt
-
- ; get params into registers
-
- MOVE.L srcBytePtr,srcPtr
- MOVE.L destBytePtr,dstPtr
- MOVE.B srcBitOffset,d1
- MOVEQ #0,d0
- MOVE.B destBitOffset,d0
-
- ; calculate srcOffset and dstOffset in
- ; bit field manipulation coordinates
- ; (bit 0 is MSB)
-
- MOVEQ #7,srcOffset
- SUB.B d1,srcOffset
- MOVEQ #7,dstOffset
- SUB.B d0,dstOffset
-
- ; exit if <= 32 bits to move
-
- CMPI.L #32,bitCt
- BLE @lastbits
-
- ; convert dstOffset to initial bit count
-
- ADDQ.W #1,d0
-
- ; STEP 1: Move enough bits to longAlign destination
- ; using bit field manipulation
-
- ; adjust bit count to longAlign destination
-
- MOVE.W dstPtr,d1
- ANDI.B #3,d1
- EORI.B #3,d1
- LSL.B #3,d1
- ADD.B d1,d0
-
- ; move initial bits
-
- BFEXTU (srcPtr){srcOffset:d0},d1
- BFINS d1,(dstPtr){dstOffset:d0}
-
- ; decrement bits left to move
-
- SUB.L d0,bitCt
-
- ; adjust source offset; this may make
- ; srcOffset >= 8, but BFEXTU does not care
-
- ADD.W d0,srcOffset
-
- ; adjust dstPtr to account for alignment
-
- LSR.B #3,d0
- ADDQ.B #1,d0
- ADDA.W d0,dstPtr
- MOVEQ #0,dstOffset
-
- ; STEP 2: Main loop, MOVE.L all 32-bit chunks
-
- ; set up d0 with number of longwords to move
-
- MOVE.W bitCt,d0
- LSR.W #5,d0
- BLE @lastbits
-
- ; set up bitCt for final BFEXTU/BFINS
-
- ANDI.W #31,bitCt
-
- ; decrement d0 for subsequent DBRA
-
- SUBQ.W #1,d0
-
- ; move bits one longword at a time
-
- MOVE.B srcOffset,d1
- ANDI.B #7,d1
- BNE.S @longloop
-
- ; special case, src is byte-aligned
-
- LSR.B #3,srcOffset
- ADDA.L srcOffset,srcPtr
- MOVEQ #0,srcOffset
-
- alignloop:
-
- MOVE.L (srcPtr)+,(dstPtr)+
- DBRA d0,@alignloop
- BRA.S @lastbits
-
- ; normal case, src not byte-aligned
-
- longloop:
-
- BFEXTU (srcPtr){srcOffset:0},d1
- MOVE.L d1,(dstPtr)+
- ADDQ.L #4,srcPtr
- DBRA d0,@longloop
-
- ; STEP 3: Move remaining bits with bit field
- ; manipulation
-
- lastbits:
-
- TST.B bitCt
- BEQ.S @done
-
- ; move leftover bits
-
- BFEXTU (srcPtr){srcOffset:bitCt},d1
- BFINS d1,(dstPtr){dstOffset:bitCt}
-
- done:
-
- ; restore registers
-
- MOVEM.L (a7)+,d6-d7
- }
- }
-